//
//  yolov2Detection.m
//  yolov2Detect
//
//  Created by riddick on 2019/3/30.
//  Copyright © 2019 riddick. All rights reserved.
//

#include "yolov2Detection.h"

CDetectObject::CDetectObject(){
    
}

CDetectObject::~CDetectObject(){
    
}

int CDetectObject::loadClasstxt(const std::string& classtxtPath, std::vector<std::string>& classes){
    std::ifstream fin(classtxtPath);
    if(!fin.is_open()){
        printf("Error! Failed to open classtxt file!\n");
        return -1;
    }
    
    std::string line;
    while (getline(fin, line)) {
        classes.push_back(line);
    }
    
    return 0;
}

//init model
int CDetectObject::init(const BOOL useCpuOnly, const MLComputeUnits computeUnit, const std::string& classtxtPath, const cv::Size& scaleSize){
    
    //init configuration
    option = [[MLPredictionOptions alloc] init];
    option.usesCPUOnly = useCpuOnly;
              
    config = [ [MLModelConfiguration alloc] init];
    config.computeUnits = computeUnit;
    
    NSError* err;
    Model = [[yoloModel alloc] initWithConfiguration:config error:&err];
    
    //init paramss
    inputSize = scaleSize;
    maxBoundingBoxes = 10;
    confidenceThreshold = 0.5;
    nmsThreshold = 0.6;
    // anchor boxes
    anchors = {0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828};
    
    //load labels
    int ret = loadClasstxt(classtxtPath, classes);
    
    return ret;
}


MLMultiArray* CDetectObject::predictImageScene(const cv::Mat& imgTensor) {
    //preprocess image
    
    //convert to cvPixelbuffer
    PixelBufferPool mat2pixelbuffer;
    CVPixelBufferRef buffer = mat2pixelbuffer.GetPixelBuffer(imgTensor);
    
    //predict from image
    NSError *error;
    yoloModelInput  *input = [[yoloModelInput alloc] initWithInput__0:buffer];
    
    yoloModelOutput *output = [Model predictionFromFeatures:input options:option error:&error];
    
    return output.output__0;
}


void CDetectObject::preprocessImage(const cv::Mat& image, cv::Mat& inputImage){
    cv::resize(image, inputImage, inputSize);
}


float CDetectObject::sigmoid(float x){
    return 1 / (1 + exp(-x));
}

void CDetectObject::mySoftMax( std::vector<float>& weightList){
    float sumWeight =0.0;
    for (int i=0; i<weightList.size(); i++)
    {
        float val = weightList[i];
        float expVal = exp(val);
        sumWeight += expVal;
        weightList[i] = expVal;
    }
    
    for (int i=0; i<weightList.size();i++)
    {
        weightList[i] /=sumWeight;
    }
}


void CDetectObject::softmax(std::vector<float>& classes){
    
    float sum =0;
    std::transform(classes.begin(), classes.end(), classes.begin(), [&sum](float score)->float{
        float exp_score = exp(score);
        sum +=exp_score;
        return exp_score;
    });
    
    std::transform(classes.begin(), classes.end(), classes.begin(), [sum](float score)->float{
        return score/sum;
    });
}

void CDetectObject::parseFeature(MLMultiArray* feature, std::vector<int>& ids, std::vector<float>& confidences, std::vector<cv::Rect>& boxes){
    
    NSArray<NSNumber*>* featureShape = feature.shape;
    int d0 = [[featureShape objectAtIndex:0] intValue];
    int d1 = [[featureShape objectAtIndex:1] intValue];
    int d2 = [[featureShape objectAtIndex:2] intValue];
    
    int stride0 = [feature.strides[0] intValue];
    int stride1 = [feature.strides[1] intValue];
    int stride2 = [feature.strides[2] intValue];
    
    int blockSize = 32;
    int gridHeight = d1;
    int gridWidth = d2;
    int boxesPerCell = 5;//Int(anchors.count/5)
    int numClasses = (int)classes.size();
    
    double* pdata = (double*)feature.dataPointer;
    
    for (int cy =0; cy< gridHeight; cy++){
        for (int cx =0; cx< gridWidth; cx++){
            for (int b=0; b<boxesPerCell; b++){
                int channel = b*(numClasses + 5);
                
                int laterId= cx*stride2+cy*stride1;
                float tx = (float)pdata[channel*stride0 + laterId];
                float ty = (float)pdata[(channel+1)*stride0 + laterId];
                float tw = (float)pdata[(channel+2)*stride0 + laterId];
                float th = (float)pdata[(channel+3)*stride0 + laterId];
                float tc = (float)pdata[(channel+4)*stride0 + laterId];
                
                // The predicted tx and ty coordinates are relative to the location
                // of the grid cell; we use the logistic sigmoid to constrain these
                // coordinates to the range 0 - 1. Then we add the cell coordinates
                // (0-12) and multiply by the number of pixels per grid cell (32).
                // Now x and y represent center of the bounding box in the original
                // 608x608 image space.
                float x = (float(cx) + sigmoid(tx)) * blockSize;
                float y = (float(cy) + sigmoid(ty)) * blockSize;
                
                // The size of the bounding box, tw and th, is predicted relative to
                // the size of an "anchor" box. Here we also transform the width and
                // height into the original 416x416 image space.
                float w = exp(tw) * anchors[2*b] * blockSize;
                float h = exp(th) * anchors[2*b + 1] * blockSize;
                
                // The confidence value for the bounding box is given by tc. We use
                // the logistic sigmoid to turn this into a percentage.
                float confidence = sigmoid(tc);
                std::vector<float> classesProb(numClasses);
                for (int i = 0; i < numClasses; ++i) {
                    int offset = (channel+5+i)*stride0 + laterId;
                    classesProb[i] =  (float)pdata[offset];
                }
                softmax(classesProb);
                
                // Find the index of the class with the largest score.
                auto max_itr = std::max_element(classesProb.begin(), classesProb.end());
                int index = int(max_itr - classesProb.begin());

                // Combine the confidence score for the bounding box, which tells us
                // how likely it is that there is an object in this box (but not what
                // kind of object it is), with the largest class prediction, which
                // tells us what kind of object it detected (but not where).
                float confidenceInClass = classesProb[index] * confidence;
                if(confidence>confidenceThreshold){
                // Since we compute 19x19x5 = 1805 bounding boxes, we only want to
                // keep the ones whose combined score is over a certain threshold.
                //if (confidenceInClass > confidenceThreshold){
                    cv::Rect2d rect =cv::Rect2d(float(x-w*0.5), float(y-h*0.5), float(w), float(h));
                    ids.push_back(index);
                    confidences.push_back(confidenceInClass);
                    boxes.push_back(rect);
                }
            }
        }
    }
}

int CDetectObject::implDetection(const cv::Mat& image, std::vector<DetectionInfo>& detectionResults){
    
    if(image.empty()){
        NSLog(@"Error! image is empty!");
        return -1;
    }
    
    //preprocessing
    cv::Mat inputImage;
    preprocessImage(image,  inputImage);
    
    //prediction
    MLMultiArray* outFeature = predictImageScene(inputImage);
    
    //analyze the output
    std::vector<int> idxList;
    std::vector<float> confidenceList;
    std::vector<cv::Rect> boxesList;
    parseFeature(outFeature, idxList, confidenceList, boxesList);
    
    //nms box
    std::vector<int> indices;
    cv::dnn::NMSBoxes(boxesList, confidenceList, confidenceThreshold, nmsThreshold, indices);
    
    //get result
    for (int i=0; i<indices.size(); i++){
        int idx = indices[i];
        DetectionInfo objectInfo;
        objectInfo.name = classes[idxList[idx]];
        objectInfo.confidence = confidenceList[idx];
        objectInfo.box = boxesList[idx];
        detectionResults.push_back(objectInfo);
    }
    
    return 0;
}


